***** STATA version 16.1
* Creates tables and figures using data from the Hong Kong Election Study (HKES)
* data source: Hong Kong Election Study. (2019). URL: https://hkelectionstudy.org/
* data version: hkesCoreData202002.dta
* 
* 
* tables: 2, C1, F1, F2, F3
* figures: 3

clear all
set more off

***** additional packages required to run the code, uncomment to install
// ssc install blindschemes, replace
// ssc install mplotoffset, replace
// ssc install estout, replace

cd "/path/to/current/directory/"

set scheme plotplain

use "hkesCoreData202002.dta", clear

***** Data cleaning
* demographic variable
rename QA1 gender
rename QA2 age
rename QA5 gc
replace gc = QD1 if QD1!=. & QD1!=81
rename QB4 income
rename QB10 education
replace education = QB11 if QB11!=.
replace education = 5 if education==6
rename QC3 property_owner
label define QC3 0 "Non-property owner" 1 "Property Owner", replace
label variable property_owner "Property Owner"

*** elections and vote choice variables
label variable election "Election"
label define election 2 "General election" 4 "By-election", replace

* recode vote choices into camps
label define camp 1 "Establishment" 2 "Moderate" 3 "Radical"  4 "Abstain" 5 "Other" 6 "Missing"

* vote choice in Mar 2018
recode QD4 (1=2) (2=5) (3=5) (4=1) (82 85 99=6) (83 84=4), generate(vote201803gc1)
recode QD5 (1=2) (2=1) (3=5) (82 85 99=6) (83 84=4), generate(vote201803gc2)
recode QD6 (1=2) (2=1) (3=1) (4=1) (5=5) (6=2) (82 85 99=6) (83 84=4), generate(vote201803gc5)
generate vote201803 = .
foreach i in 1 2 5 {
	replace vote201803 = vote201803gc`i' if vote201803gc`i'!=.
}
label values vote201803gc1 vote201803gc2 vote201803gc5 vote201803 camp

* vote choice in Sep 2016
recode QD7 (4 5 6 14=1) (1 2 3 10 15=2) (7 8 11=3) (9 12 13=5) (82 85 99=6) (83 84=4), generate(vote201609gc1)
recode QD8 (4 5 14 15=1) (1 2 3 12=2) (6 7 8 9 10=3) (11 13=5) (82 85 99=6) (83 84=4), generate(vote201609gc2)
recode QD9 (4 5 6 12=1) (1 2 3 8 10=2) (7 9 11=3) (82 85 99=6) (83 84=4), generate(vote201609gc3)
recode QD10 (6 7 8 9 10 14 20=1) (1 2 3 4 5=2) (11 12 13 15 16 19=3) (17 18=5) (82 85 99=6) (83 84=4), generate(vote201609gc4)
recode QD11 (6 7 8 9 10 16 19 20 21=1) (1 2 3 4 5=2) (11 12 13 14 15 22=3) (17 18=6) (82 85 99=6) (83 84=4), generate(vote201609gc5)
generate vote201609 = .
foreach i in 1 2 3 4 5 {
	replace vote201609 = vote201609gc`i' if vote201609gc`i'!=.
}

* recode political parties the respondents feel closest to by camp
recode QI3 (104 105 108 110 113 114=1) (201 202 203 206 209 212=2) (207 211 214 215 216 217=3) (191 291 999=.), generate(camp) // code LSD as radical
recode QI3 (104 105 108 110 113 114=1) (201 202 203 206 207 209 212=2) (211 214 215 216 217=3) (191 291 999=.), generate(camp_alt) // code LSD as moderate
label values camp camp
label values camp_alt camp

*** additional covariates
* camp approval
foreach var in QJ9 QJ10 QJ11 {
    replace `var' = . if `var' >10
}

rename QJ9 establishment_approval
rename QJ10 democratic_approval
rename QJ11 localist_approval

label variable establishment_approval "Establishment camp leader approval"
label variable democratic_approval "Democratic camp leader approval"
label variable localist_approval "Localist camp leader approval"

* identity
rename QI1 identity
replace identity = . if identity==91 | identit==97


*** dependent variables
* voted: Voted for opcamp if either moderate or radical supporter; voted for establishment if establishment supporter
generate voted = .
replace voted = (vote201609==2|vote201609==3) if election==2 & (camp==2|camp==3)
replace voted = (vote201803==2|vote201803==3) if election==4 & (camp==2|camp==3)
replace voted = vote201609==1 if election==2 & camp==1
replace voted = vote201803==1 if election==4 & camp==1
replace voted = 0 if (vote201609==4 & election==2) | (vote201803==4 & election==4)
replace voted = . if vote201609==6|vote201803==6

* fairness of voting
rename QD15 fairness
replace fairness = . if fairness>10

* importance of voting
* rescale 3 different scales into 0-1
generate importance = (QE4-1)/4 if QE4<=10
replace importance = (QE5-1)/9 if QE5<=10 & (election==3|election==4)
replace importance = (QE5)/10 if QE5<=10 & election==5

label variable fairness "Perceived Fairness of Elections"
label variable importance "Importance of voting"
label variable voted "Voted for own camp"


***** restrict sample
keep if (election==2 & prepost==2) | election==4


***** Descriptive statistics
* table C1
foreach var of varlist camp gender education income {
    tabulate `var', gen(f_`var')
}

eststo ge: estpost summarize f_camp1 f_camp2 f_camp3 voted fairness importance f_gender* age f_education6 f_education1-f_education5 f_income* property_owner ///
	if (camp==1|camp==2|camp==3) & election==2 
eststo be: estpost summarize f_camp1 f_camp2 f_camp3 voted fairness importance f_gender* age f_education6 f_education1-f_education5 f_income* property_owner ///
	if (camp==1|camp==2|camp==3) & election==4 
esttab ge be using "tables/table C1.rtf", rtf cells("mean(fmt(2)) sd(fmt(2))") label mtitle("General election" "By-election") ///
	refcat(f_camp1 "Camp" f_gender1 "Gender" f_education6 "Education" f_income1 "Income", nolabel) ///
	coeflabel(f_camp1 "Establishment" f_camp2 " Moderate" f_camp3 " Radical" f_gender1 " Male" f_gender2 " Female" age "Age" ///
		f_education6 " No formal education" f_education1 " Elementary" f_education2 " Secondary" ///
		f_education3 " Associate degree/Diploma" f_education4 " University education" f_education5 " Master's/PhD degree'" ///
		f_income1 " HK$ 9,999 or less" f_income2 " HK$ 10,000 - 19,999" f_income3 " HK$ 20,000 - 29,999" f_income4 " HK$ 30,000 - 39,999" ///
		f_income5 " HK$ 40,000 - 49,999" f_income6 " HK$ 50,000 - 59,999" f_income7 " HK$ 60,000 - 79,999" f_income8 " HK$ 80,000 or above") ///
	main(mean 6.2f) ///
	replace  



***** main result
* table 2 and figure 3
local fairness_title "Fairness of elections"
local importance_title "Importance of voting"
local voted_title "Voted for own camp"


foreach var in voted fairness importance {
	regress `var' i.camp##i.election i.gender c.age##c.age i.education i.income i.property_owner, vce(robust)
	quietly estadd local control "Yes", replace
	estimates store `var'_cov
	test (4.election + 2.camp#4.election==0)
	test (4.election + 3.camp#4.election==0)
	test (2.camp#4.election - 3.camp#4.election==0)
	margins camp#election
	mplotoffset, x(election) xscale(range(1 5)) xlabel(,labsize(*1.5)) ///
		title("``var'_title'", size(large)) ///
		ytitle("Predicted value", size(large)) xtitle("") ///
		plot1opts(msymbol(O) mcolor(ebblue) lpattern(".") lcolor(ebblue)) ci1opts(color(ebblue))  ///
		plot2opts(msymbol(O) mcolor("229 178 0") lpattern(solid) lcolor("229 178 0")) ci2opts(color("229 178 0")) ///
		plot3opts(msymbol(D) mcolor("229 178 0 *2") lpattern("-") lcolor("229 178 0 *2")) ci3opts(color("229 178 0 *2")) ///
		legend(position(6) row(1)) name(`var'_cov, replace)
}

graph combine voted_cov fairness_cov importance_cov, row(1) ysize(4) xsize(8) name(combined_cov, replace)
graph export "figures/figure 3.png", replace height(1200) width(2400)

esttab voted_cov fairness_cov importance_cov using "tables/table 2.rtf", ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) b(%9.3f) se(%9.3f) nobaselevels nonote nogaps ///
	drop(age c.age#c.age *.gender *.education *.income *.property_owner) ///
	refcat(2.camp "(ref: Establishment)", nolabel) ///
	stats(control N, ///
		label("Demographic controls" "Observations") fmt(%9.0f)) ///
	addnote("Robust standard errors in parentheses" "+ p< 0.10, * p < 0.05, ** p < 0.01, *** p < 0.001") ///
	label interaction(" x ") replace


	
***** logit/ologit as robustness check
* table F1

* note: HKES used two different scale to measure the importance of voting in different rounds of survey, therefore importance of voting is transformed to a binary variable with >= mid-point of the scale as a cut-off
generate importance_b = importance>.5 & importance!=.
label variable importance_b "Importance of voting"

foreach var in importance_b voted {
	logit `var' i.camp##i.election i.gender c.age##c.age i.education i.income i.property_owner, vce(robust)
	quietly estadd local control "Yes", replace
	estimates store `var'_logit_cov
	test (4.election + 2.camp#4.election==0)
	test (4.election + 3.camp#4.election==0)
}

foreach var in fairness {
	ologit `var' i.camp##i.election i.gender c.age##c.age i.education i.income i.property_owner, vce(robust)
	quietly estadd local control "Yes", replace
	estimates store `var'_ologit_cov
	test (4.election + 2.camp#4.election==0)
	test (4.election + 3.camp#4.election==0)
}

esttab voted_logit_cov fairness_ologit_cov importance_b_logit_cov using "tables/table F1.rtf", ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) b(%9.3f) se(%9.3f) nobaselevels nonote nogaps ///
	drop(age c.age#c.age *.gender *.education *.income *.property_owner) ///
	refcat(2.camp "(ref: Establishment)", nolabel) ///
	stats(control N, ///
		label("Demographic controls" "Observations") fmt(%9.0f)) ///
	addnote("Robust standard errors in parentheses" "+ p< 0.10, * p < 0.05, ** p < 0.01, *** p < 0.001") ///
	label interaction(" x ") replace
	

***** replication with LSD classified as moderate
* table F2
local fairness_title "Fairness of elections"
local importance_title "Importance of voting"
local voted_title "Voted for own camp"


foreach var in voted fairness importance {
	regress `var' i.camp_alt##i.election i.gender c.age##c.age i.education i.income i.property_owner, vce(robust)
	quietly estadd local control "Yes", replace
	estimates store `var'_cov_alt
	test (4.election + 2.camp_alt#4.election==0)
	test (4.election + 3.camp_alt#4.election==0)
	test (2.camp#4.election - 3.camp_alt#4.election==0)
}

esttab voted_cov_alt fairness_cov_alt importance_cov_alt using "tables/table F2.rtf", ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) b(%9.3f) se(%9.3f) nobaselevels nonote nogaps ///
	drop(age c.age#c.age *.gender *.education *.income *.property_owner) ///
	refcat(2.camp "(ref: Establishment)", nolabel) ///
	stats(control N, ///
		label("Demographic controls" "Observations") fmt(%9.0f)) ///
	addnote("Robust standard errors in parentheses" "+ p< 0.10, * p < 0.05, ** p < 0.01, *** p < 0.001") ///
	label interaction(" x ") replace

	
***** additional controls: camp leader support, social identity
* table F3
foreach var in voted fairness importance {
	regress `var' i.camp##i.election c.establishment_approval c.democratic_approval c.localist_approval i.identity i.gender c.age##c.age i.education i.income i.property_owner, vce(robust)
	quietly estadd local control "Yes", replace
	estimates store `var'_cov2
	test (4.election + 2.camp#4.election==0)
	test (4.election + 3.camp#4.election==0)
	test (2.camp#4.election - 3.camp#4.election==0)
}

esttab voted_cov2 fairness_cov2 importance_cov2 using "tables/table F3.rtf", ///
	star(+ 0.10 * 0.05 ** 0.01 *** 0.001) b(%9.3f) se(%9.3f) nobaselevels nonote nogaps ///
	drop(age c.age#c.age *.gender *.education *.income *.property_owner) ///
	refcat(2.camp "(ref: Establishment)" 2.identity "(ref: Chinese)", nolabel) ///
	stats(control N, ///
		label("Demographic controls" "Observations") fmt(%9.0f)) ///
	addnote("Robust standard errors in parentheses" "+ p< 0.10, * p < 0.05, ** p < 0.01, *** p < 0.001. For perceived importance of voting, different scales are used in the two election survey (1-5 in September 2016 and 1-10 in March 2018). They are first transformed to a 0-1 scale and then to a binary variable with a cutoff of >0.5 here.") ///
	label interaction(" x ") replace
